package crawler.hanteng;

import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.alibaba.fastjson.JSON;
import com.zql.entity.AgencyEntity;

import util.HttpConnectionPost;
import util.MybatisTool;
import util.TianYanCha;

public class HanTengQiCheCrawler {
	public List<AgencyEntity> getAgency() {
		List<AgencyEntity> list = new ArrayList<AgencyEntity>();
		try {
			String getPro = "http://www.hantengauto.com/senver/index.html";
			Document proDoc = Jsoup.connect(getPro).get();
			Element proSelect = proDoc.getElementsByClass("select-box").get(0);
			Elements proOptions = proSelect.getElementsByTag("dd").get(0).getElementsByTag("a");
			for (int i = 0; i < proOptions.size(); i++) {
				String proNum = proOptions.get(i).attr("value");
				String proName = proOptions.get(i).text();
				// System.out.println(proNum+":"+proName);

				String getCity = "http://www.hantengauto.com/front/ajax_getCategoryList.do";
				Document cityDoc = Jsoup.connect(getCity).data("id", proNum).post();
				Elements cityOptions = cityDoc.getElementsByTag("a");
				for (int j = 0; j < cityOptions.size(); j++) {
					String cityName = cityOptions.get(j).text();
					String cityNum = cityOptions.get(j).attr("value");
					// System.out.println(cityNum+":"+cityName);

					String getDealer = "http://www.hantengauto.com/front/ajax_getJingXiaoShangList.do";
					String param = "jingxiaoValue=&shengjiValue="
							 +proNum  + "&classJxsId=259532&flag=1&chengShiValue=-1" +cityNum;
					String dealerText = HttpConnectionPost.getJson(getDealer, param);
					String dealerHtml = JSON.parseObject(dealerText).getString("listPoint");

					if (dealerHtml.isEmpty()) {
						continue;
					}
					String regex = "(?<=\\[)\\d+.*?(?=\\])";
					Pattern p = Pattern.compile(regex);
					Matcher m = p.matcher(dealerHtml);
					while (m.find()) {
						String[] dealerInfo = m.group().split(",");
						String lng = dealerInfo[0];
						String lat = dealerInfo[1];
						
						Document dealerDoc = Jsoup.parse(dealerInfo[2]);
						Elements items = dealerDoc.getElementsByTag("body").get(0).children();
						String name = items.get(0).text();
						String address = items.get(1).text();
						String sellTell = items.get(2).text().split(":")[1];
						List<String> shareholder = TianYanCha.getShareholder(name);
						String sControllingShareholder = shareholder.get(0);
						String sOtherShareholders = shareholder.get(1);
						
						AgencyEntity agency = new AgencyEntity();
						agency.setdCloseDate(null);
						agency.setdOpeningDate(null);
						agency.setdUpdateTime(new Timestamp(System.currentTimeMillis()));
						agency.setnBrandID(-1);
						agency.setsBrand("汉腾");

						agency.setnDealerIDWeb(-1);
						agency.setnManufacturerID(-1);
						agency.setsManufacturer("汉腾汽车");

						agency.setnState(1);
						agency.setsAddress(address);
						agency.setsCity(cityName);
						agency.setsCustomerServiceCall(null);
						agency.setsDealerName(name);
						agency.setsDealerType(null);
						agency.setsProvince(proName);
						agency.setsSaleCall(sellTell);
						agency.setsLongitude(lng);
						agency.setsLatitude(lat);
						agency.setsControllingShareholder(sControllingShareholder);
						agency.setsOtherShareholders(sOtherShareholders);
						list.add(agency);

					}
					System.exit(0);

				}
			}
		} catch (Exception e) {
			throw new RuntimeException("爬虫异常", e);
		}
		return list;
	}

	public static void main(String[] args) {
		HanTengQiCheCrawler crawler = new HanTengQiCheCrawler();
		System.out.println("爬虫开始...");
		List<AgencyEntity> agencys = crawler.getAgency();
		// CreateTable.addTable(tableName);
		System.out.println("抓取完毕,正在存库");
		for (AgencyEntity agency : agencys) {
			MybatisTool.save(agency);
		}
		MybatisTool.close();
		System.out.println("请查看数据库");
	}

}
